In [1]:
import pyspark
from pyspark import SparkConf
from pyspark.sql import SparkSession
In [2]:
spark = SparkSession \
    .builder \
    .appName("Dadosfera case study") \
    .config("spark.some.config.option", "some-value") \
    .getOrCreate()
In [3]:
spark.sparkContext.getConf().getAll()
Out[3]:
[('spark.some.config.option', 'some-value'),
 ('spark.app.id', 'local-1645194382937'),
 ('spark.executor.id', 'driver'),
 ('spark.app.name', 'Dadosfera case study'),
 ('spark.driver.port', '56884'),
 ('spark.rdd.compress', 'True'),
 ('spark.serializer.objectStreamReset', '100'),
 ('spark.app.startTime', '1645194380930'),
 ('spark.master', 'local[*]'),
 ('spark.submit.pyFiles', ''),
 ('spark.submit.deployMode', 'client'),
 ('spark.driver.host', 'DESKTOP-MGDJI74'),
 ('spark.sql.warehouse.dir', 'file:/C:/Users/Usuario/spark-warehouse'),
 ('spark.ui.showConsoleProgress', 'true')]
In [4]:
spark
Out[4]:

SparkSession - in-memory

SparkContext

Spark UI

Version
v3.2.1
Master
local[*]
AppName
Dadosfera case study
In [5]:
folder = "C:\\Users\\Usuario\\Downloads\\data_trips"
In [6]:
import glob

txtfiles = []
for file in glob.glob(folder+"\\*"):
    txtfiles.append(file)
In [7]:
txtfiles
Out[7]:
['C:\\Users\\Usuario\\Downloads\\data_trips\\data-sample_data-nyctaxi-trips-2009-json_corrigido.json',
 'C:\\Users\\Usuario\\Downloads\\data_trips\\data-sample_data-nyctaxi-trips-2010-json_corrigido.json',
 'C:\\Users\\Usuario\\Downloads\\data_trips\\data-sample_data-nyctaxi-trips-2011-json_corrigido.json',
 'C:\\Users\\Usuario\\Downloads\\data_trips\\data-sample_data-nyctaxi-trips-2012-json_corrigido.json']
In [8]:
begin = True
for file in txtfiles:
    if begin:
        df = spark.read.json(file)
        begin = False
    else:
        df = df.union(spark.read.json(file))
In [9]:
df.createOrReplaceTempView("trips")
In [22]:
first_answer = spark.sql("SELECT trip_distance, passenger_count FROM trips WHERE passenger_count <= 2").toPandas()
In [23]:
first_answer
Out[23]:
trip_distance passenger_count
0 0.80 2
1 5.40 1
2 4.13 1
3 2.03 1
4 1.15 1
... ... ...
3319647 1.40 1
3319648 1.60 2
3319649 1.80 1
3319650 1.70 2
3319651 2.00 1

3319652 rows × 2 columns

In [12]:
import plotly.express as px
In [ ]:
# fig = px.box(first_answer, y="trip_distance")
# fig.show()
In [14]:
import holoviews as hv
In [16]:
points = hv.Points(first_answer, 'trip_distance')
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_8536/1089732015.py in <module>
----> 1 points = hv.Points(first_answer, 'trip_distance')

~\anaconda3\lib\site-packages\holoviews\element\selection.py in __init__(self, *args, **kwargs)
     21 
     22     def __init__(self, *args, **kwargs):
---> 23         super(SelectionIndexExpr, self).__init__(*args, **kwargs)
     24         self._index_skip = False
     25 

~\anaconda3\lib\site-packages\holoviews\core\data\__init__.py in __init__(self, data, kdims, vdims, **kwargs)
    342                                            datatype=kwargs.get('datatype'))
    343         (data, self.interface, dims, extra_kws) = initialized
--> 344         super(Dataset, self).__init__(data, **dict(kwargs, **dict(dims, **extra_kws)))
    345         self.interface.validate(self, validate_vdims)
    346 

~\anaconda3\lib\site-packages\holoviews\core\dimension.py in __init__(self, data, kdims, vdims, **params)
    852             params['cdims'] = {d if isinstance(d, Dimension) else Dimension(d): val
    853                                for d, val in params['cdims'].items()}
--> 854         super(Dimensioned, self).__init__(data, **params)
    855         self.ndims = len(self.kdims)
    856         cdims = [(d.name, val) for d, val in self.cdims.items()]

~\anaconda3\lib\site-packages\holoviews\core\dimension.py in __init__(self, data, id, plot_id, **params)
    511             params['group'] = long_name
    512 
--> 513         super(LabelledData, self).__init__(**params)
    514         if not util.group_sanitizer.allowable(self.group):
    515             raise ValueError("Supplied group %r contains invalid characters." %

~\anaconda3\lib\site-packages\param\parameterized.py in __init__(self, **params)
   3144 
   3145         self.param._generate_name()
-> 3146         self.param._setup_params(**params)
   3147         object_count += 1
   3148 

~\anaconda3\lib\site-packages\param\parameterized.py in override_initialization(self_, *args, **kw)
   1361         original_initialized = parameterized_instance.initialized
   1362         parameterized_instance.initialized = False
-> 1363         fn(parameterized_instance, *args, **kw)
   1364         parameterized_instance.initialized = original_initialized
   1365     return override_initialization

~\anaconda3\lib\site-packages\param\parameterized.py in _setup_params(self_, **params)
   1614                 self.param.warning("Setting non-parameter attribute %s=%s using a mechanism intended only for parameters", name, val)
   1615             # i.e. if not desc it's setting an attribute in __dict__, not a Parameter
-> 1616             setattr(self, name, val)
   1617 
   1618     # PARAM2_DEPRECATION: Backwards compatibilitity for param<1.12

~\anaconda3\lib\site-packages\param\parameterized.py in _f(self, obj, val)
    351             instance_param.__set__(obj, val)
    352             return
--> 353         return f(self, obj, val)
    354     return _f
    355 

~\anaconda3\lib\site-packages\param\parameterized.py in __set__(self, obj, val)
   1175             val = self.set_hook(obj,val)
   1176 
-> 1177         self._validate(val)
   1178 
   1179         _old = NotImplemented

~\anaconda3\lib\site-packages\param\__init__.py in _validate(self, val)
   1396         """
   1397         self._validate_value(val, self.allow_None)
-> 1398         self._validate_bounds(val, self.bounds)
   1399         self._validate_item_type(val, self.item_type)
   1400 

~\anaconda3\lib\site-packages\param\__init__.py in _validate_bounds(self, val, bounds)
   1407         if min_length is not None and max_length is not None:
   1408             if not (min_length <= l <= max_length):
-> 1409                 raise ValueError("%s: list length must be between %s and %s (inclusive)"%(self.name,min_length,max_length))
   1410         elif min_length is not None:
   1411             if not min_length <= l:

ValueError: kdims: list length must be between 2 and 2 (inclusive)
In [18]:
import pandas as pd
import holoviews as hv

from holoviews.operation.timeseries import rolling, rolling_outlier_std

hv.extension('bokeh')
In [29]:
first_answer
Out[29]:
trip_distance passenger_count
0 0.80 2
1 5.40 1
2 4.13 1
3 2.03 1
4 1.15 1
... ... ...
3319647 1.40 1
3319648 1.60 2
3319649 1.80 1
3319650 1.70 2
3319651 2.00 1

3319652 rows × 2 columns

In [30]:
first_answer['trip_distance'].mean()
Out[30]:
2.6625269962030926
In [25]:
import seaborn as sns
sns.set_theme(style="whitegrid")
ax = sns.boxplot(y=first_answer["trip_distance"])
In [28]:
first_answer.describe()
Out[28]:
trip_distance passenger_count
count 3.319652e+06 3.319652e+06
mean 2.662527e+00 1.219706e+00
std 3.112605e+00 4.143705e-01
min 0.000000e+00 0.000000e+00
25% 1.000000e+00 1.000000e+00
50% 1.680000e+00 1.000000e+00
75% 3.000000e+00 1.000000e+00
max 4.970000e+01 2.000000e+00
In [27]:
sns.histplot(data=first_answer, x="trip_distance", kde=True)
Out[27]:
<AxesSubplot:xlabel='trip_distance', ylabel='Count'>
In [31]:
sns.histplot(data=first_answer, x="trip_distance", kde=True, hue="passenger_count")
Out[31]:
<AxesSubplot:xlabel='trip_distance', ylabel='Count'>
In [33]:
first_answer[first_answer['passenger_count']==0]
Out[33]:
trip_distance passenger_count
6257 0.0 0
16334 0.6 0
18399 0.6 0
27688 0.0 0
51794 0.6 0
... ... ...
3284510 6.0 0
3290893 1.5 0
3294944 0.6 0
3297524 1.5 0
3299504 2.1 0

444 rows × 2 columns

In [34]:
sns.histplot(data=first_answer[first_answer['passenger_count']==0], x="trip_distance", kde=True)
Out[34]:
<AxesSubplot:xlabel='trip_distance', ylabel='Count'>
In [44]:
sqlDF = spark.sql("SELECT vendor_id, sum(total_amount) as total, count(total_amount) as count FROM trips group by vendor_id order by total desc")
sqlDF.show()
+---------+--------------------+-------+
|vendor_id|               total|  count|
+---------+--------------------+-------+
|      CMT| 1.954908428000008E7|1916128|
|      VTS|1.9043433999999914E7|1833052|
|      DDS|  2714901.7200000025| 250816|
|       TS|                45.6|      4|
+---------+--------------------+-------+

In [45]:
sqlDF = spark.sql("SELECT * FROM trips WHERE vendor_id = 'TS'")
sqlDF.show()
+--------------------+----------------+-----------------+-----------+---------------+------------+--------------------+---------------+----------------+---------+------------------+---------+----------+------------+------------+-------------+---------+
|    dropoff_datetime|dropoff_latitude|dropoff_longitude|fare_amount|passenger_count|payment_type|     pickup_datetime|pickup_latitude|pickup_longitude|rate_code|store_and_fwd_flag|surcharge|tip_amount|tolls_amount|total_amount|trip_distance|vendor_id|
+--------------------+----------------+-----------------+-----------+---------------+------------+--------------------+---------------+----------------+---------+------------------+---------+----------+------------+------------+-------------+---------+
|2009-08-22T14:45:...|       40.787885|       -73.954648|       10.9|              2|        CASH|2009-08-22T14:33:...|       40.74291|       -73.98244|     null|              null|      0.5|       0.0|         0.0|        11.4|          3.8|       TS|
|2010-01-28T19:10:...|       40.787885|       -73.954648|       10.9|              2|        CASH|2010-01-28T19:00:...|       40.74291|       -73.98244|     null|              null|      0.5|       0.0|         0.0|        11.4|          3.8|       TS|
|2011-10-18T20:27:...|       40.787885|       -73.954648|       10.9|              2|        CASH|2011-10-18T20:19:...|       40.74291|       -73.98244|     null|              null|      0.5|       0.0|         0.0|        11.4|          3.8|       TS|
|2012-07-11T10:11:...|       40.787885|       -73.954648|       10.9|              2|        CASH|2012-07-11T10:04:...|       40.74291|       -73.98244|     null|              null|      0.5|       0.0|         0.0|        11.4|          3.8|       TS|
+--------------------+----------------+-----------------+-----------+---------------+------------+--------------------+---------------+----------------+---------+------------------+---------+----------+------------+------------+-------------+---------+

In [46]:
test = spark.sql("SELECT * FROM trips WHERE vendor_id = 'TS'").toPandas()
In [47]:
test
Out[47]:
dropoff_datetime dropoff_latitude dropoff_longitude fare_amount passenger_count payment_type pickup_datetime pickup_latitude pickup_longitude rate_code store_and_fwd_flag surcharge tip_amount tolls_amount total_amount trip_distance vendor_id
0 2009-08-22T14:45:03.951506+00:00 40.787885 -73.954648 10.9 2 CASH 2009-08-22T14:33:16.751404+00:00 40.74291 -73.98244 None NaN 0.5 0.0 0.0 11.4 3.8 TS
1 2010-01-28T19:10:06.358666+00:00 40.787885 -73.954648 10.9 2 CASH 2010-01-28T19:00:36.795723+00:00 40.74291 -73.98244 None NaN 0.5 0.0 0.0 11.4 3.8 TS
2 2011-10-18T20:27:33.827441+00:00 40.787885 -73.954648 10.9 2 CASH 2011-10-18T20:19:22.258244+00:00 40.74291 -73.98244 None NaN 0.5 0.0 0.0 11.4 3.8 TS
3 2012-07-11T10:11:00.242403+00:00 40.787885 -73.954648 10.9 2 CASH 2012-07-11T10:04:38.066432+00:00 40.74291 -73.98244 None NaN 0.5 0.0 0.0 11.4 3.8 TS
In [48]:
sqlDF = spark.sql("SELECT vendor_id, sum(total_amount + tip_amount + surcharge - tolls_amount) as total, count(total_amount) as count FROM trips group by vendor_id order by total desc")
sqlDF.show()
+---------+--------------------+-------+
|vendor_id|               total|  count|
+---------+--------------------+-------+
|      VTS|2.0189957039999817E7|1833052|
|      CMT|2.0051976479999762E7|1916128|
|      DDS|   2853020.600000002| 250816|
|       TS|                47.6|      4|
+---------+--------------------+-------+

In [49]:
sqlDF = spark.sql("SELECT distinct(payment_type) FROM trips")
sqlDF.show()
+------------+
|payment_type|
+------------+
|   No Charge|
|        CASH|
|      Credit|
|        Cash|
|     Dispute|
|      CREDIT|
+------------+

In [55]:
sqlDF = spark.sql("SELECT sum(total_amount), count(total_amount), MONTH(pickup_datetime) as month, YEAR(pickup_datetime) as year FROM trips group by year, month order by year, month")
sqlDF.show()
+-----------------+-------------------+-----+----+
|sum(total_amount)|count(total_amount)|month|year|
+-----------------+-------------------+-----+----+
|848481.0900000015|              82415|    1|2009|
|849284.3299999984|              82192|    2|2009|
|945950.0300000007|              91353|    3|2009|
|904153.0399999974|              87787|    4|2009|
|943034.3999999976|              91211|    5|2009|
|907902.9799999993|              88094|    6|2009|
|942157.6100000003|              91234|    7|2009|
|945951.6600000031|              91331|    8|2009|
|910582.5799999982|              88331|    9|2009|
|942709.1199999984|              91293|   10|2009|
|912818.9599999993|              88288|   11|2009|
|273840.6000000002|              26471|   12|2009|
|841325.4200000009|              81673|    1|2010|
|850524.2200000021|              82350|    2|2010|
|936354.5600000024|              90511|    3|2010|
|908271.2899999989|              87657|    4|2010|
|        935763.11|              90728|    5|2010|
|910471.2899999971|              87731|    6|2010|
|933746.3699999999|              90602|    7|2010|
|932630.0399999997|              90387|    8|2010|
+-----------------+-------------------+-----+----+
only showing top 20 rows

In [60]:
test = spark.sql("SELECT sum(total_amount), count(total_amount), MONTH(pickup_datetime) as month, YEAR(pickup_datetime) as year FROM trips WHERE payment_type in ('Cash', 'CASH') group by year, month order by year, month").toPandas()
In [61]:
test
Out[61]:
sum(total_amount) count(total_amount) month year
0 627267.13 66824 1 2009
1 625186.60 66680 2 2009
2 696467.42 73948 3 2009
3 667394.58 71142 4 2009
4 698388.04 74151 5 2009
5 669432.45 71342 6 2009
6 697615.20 74076 7 2009
7 697139.14 74021 8 2009
8 671736.14 71511 9 2009
9 695397.38 74011 10 2009
10 670152.72 71424 11 2009
11 204377.30 21583 12 2009
12 622337.46 66179 1 2010
13 624890.13 66837 2 2010
14 695007.47 73616 3 2010
15 671593.76 71075 4 2010
16 690537.78 73663 5 2010
17 670187.13 70987 6 2010
18 690775.09 73487 7 2010
19 688273.39 73358 8 2010
20 665003.80 71113 9 2010
21 692699.55 73648 10 2010
22 666169.22 70876 11 2010
23 243079.32 25874 12 2010
24 520871.44 55570 1 2011
25 667657.01 70990 2 2011
26 734770.64 78285 3 2011
27 714801.75 76122 4 2011
28 736945.01 78591 5 2011
29 711644.71 75734 6 2011
30 740846.28 78713 7 2011
31 745906.48 78855 8 2011
32 715333.65 76246 9 2011
33 740357.19 78580 10 2011
34 591419.94 63027 11 2011
35 759740.19 80926 1 2012
36 735060.27 78451 2 2012
37 783597.32 83368 3 2012
38 764028.96 81298 4 2012
39 789521.00 83935 5 2012
40 760125.42 80814 6 2012
41 791345.04 84421 7 2012
42 785371.96 83431 8 2012
43 766579.46 81286 9 2012
44 685184.48 72783 10 2012
In [63]:
ax = sns.barplot(x="month", y="sum(total_amount)", hue="year", data=test)
In [64]:
sqlDF = spark.sql("SELECT * FROM trips WHERE pickup_datetime between '2011-02-01' and '2011-02-05' ")
sqlDF.show()
+--------------------+----------------+-----------------+-----------+---------------+------------+--------------------+---------------+----------------+---------+------------------+---------+----------+------------+------------+-------------+---------+
|    dropoff_datetime|dropoff_latitude|dropoff_longitude|fare_amount|passenger_count|payment_type|     pickup_datetime|pickup_latitude|pickup_longitude|rate_code|store_and_fwd_flag|surcharge|tip_amount|tolls_amount|total_amount|trip_distance|vendor_id|
+--------------------+----------------+-----------------+-----------+---------------+------------+--------------------+---------------+----------------+---------+------------------+---------+----------+------------+------------+-------------+---------+
|2011-02-04T13:45:...|       40.738353|       -74.002237|       12.1|              1|      Credit|2011-02-04T13:36:...|      40.763628|      -73.956142|     null|              null|      1.0|       3.0|         0.0|        16.1|          4.1|      VTS|
|2011-02-03T00:25:...|        40.74865|       -73.978213|        4.9|              1|        CASH|2011-02-03T00:18:...|      40.757912|      -73.987015|     null|              null|      0.0|       0.0|         0.0|         4.9|         0.85|      VTS|
|2011-02-04T17:10:...|        40.76764|       -73.955103|        6.9|              2|        CASH|2011-02-04T17:02:...|      40.748987|      -73.974662|     null|              null|      0.0|       0.0|         0.0|         6.9|         1.84|      VTS|
|2011-02-01T03:03:...|        40.81954|       -73.953983|       22.9|              5|        CASH|2011-02-01T02:49:...|      40.721385|      -74.004608|     null|              null|      1.0|       0.0|         0.0|        23.9|         8.63|      VTS|
|2011-02-01T04:38:...|       40.721187|       -74.010043|        8.9|              1|        CASH|2011-02-01T04:31:...|       40.73965|      -73.984692|     null|              null|      0.0|       0.0|         0.0|         8.9|         2.72|      VTS|
|2011-02-01T21:30:...|       40.752753|       -73.975333|        5.7|              5|        CASH|2011-02-01T21:17:...|      40.763102|      -73.967735|     null|              null|      1.0|       0.0|         0.0|         6.7|         0.89|      VTS|
|2011-02-04T07:46:...|       40.727866|        -73.98225|       12.5|              2|        CASH|2011-02-04T07:34:...|      40.764334|       -73.96896|     null|              null|      0.5|       0.0|         0.0|        13.0|          3.4|      DDS|
|2011-02-04T06:40:...|       40.741143|       -73.998067|        5.0|              1|        Cash|2011-02-04T06:31:...|      40.739177|      -74.006429|     null|              null|      0.0|       0.0|         0.0|         5.0|          0.8|      CMT|
|2011-02-04T02:07:...|       40.760225|       -73.973082|        6.6|              1|        Cash|2011-02-04T01:59:...|      40.772713|      -73.977549|     null|              null|      0.0|       0.0|         0.0|         6.6|          1.4|      CMT|
|2011-02-03T09:43:...|       40.738065|       -73.983586|        4.9|              1|        Cash|2011-02-03T09:35:...|       40.72777|      -73.983387|     null|              null|      0.0|       0.0|         0.0|         4.9|          0.9|      CMT|
|2011-02-04T11:20:...|        40.75304|       -73.987491|        6.9|              3|        Cash|2011-02-04T11:12:...|      40.738115|      -74.003779|     null|              null|      0.0|       0.0|         0.0|         6.9|          1.5|      CMT|
|2011-02-02T02:51:...|       40.720601|       -73.985222|        7.0|              1|        Cash|2011-02-02T02:42:...|      40.725551|      -74.004357|     null|              null|      0.0|       0.0|         0.0|         7.0|          1.3|      CMT|
|2011-02-03T02:46:...|       40.753529|       -73.977958|        5.3|              1|        Cash|2011-02-03T02:42:...|      40.759811|      -73.984697|     null|              null|      0.0|       0.0|         0.0|         5.3|          0.7|      CMT|
|2011-02-03T17:35:...|       40.851285|       -73.940091|       18.2|              1|        Cash|2011-02-03T17:28:...|      40.767269|      -73.983266|     null|              null|      0.0|       0.0|         0.0|        18.2|          7.3|      CMT|
|2011-02-03T08:42:...|       40.791748|       -73.968587|        8.1|              2|        CASH|2011-02-03T08:34:...|        40.7789|      -73.962533|     null|              null|      0.0|       0.0|         0.0|         8.1|         1.68|      VTS|
|2011-02-03T00:33:...|       40.780733|        -73.94612|       10.9|              1|        CASH|2011-02-03T00:25:...|      40.751407|      -73.982158|     null|              null|      1.0|       0.0|         0.0|        11.9|         3.66|      VTS|
|2011-02-01T07:26:...|       40.738602|       -74.002527|        5.3|              1|      Credit|2011-02-01T07:12:...|      40.721728|      -74.008325|     null|              null|      0.5|       1.5|         0.0|         7.3|          1.3|      VTS|
|2011-02-03T14:23:...|       40.763353|        -73.97936|        5.7|              1|        CASH|2011-02-03T14:12:...|       40.75584|       -73.97532|     null|              null|      1.0|       0.0|         0.0|         6.7|         0.79|      VTS|
|2011-02-04T20:08:...|       40.739775|       -73.998777|        8.9|              1|        CASH|2011-02-04T19:56:...|      40.759947|      -73.975647|     null|              null|      1.0|       0.0|         0.0|         9.9|         2.16|      VTS|
|2011-02-03T23:51:...|       40.780722|       -73.957058|        8.5|              5|        CASH|2011-02-03T23:42:...|      40.758972|      -73.972563|     null|              null|      0.5|       0.0|         0.0|         9.0|         2.42|      VTS|
+--------------------+----------------+-----------------+-----------+---------------+------------+--------------------+---------------+----------------+---------+------------------+---------+----------+------------+------------+-------------+---------+
only showing top 20 rows

In [88]:
test = spark.sql("SELECT concat(concat((case when length(DAY(pickup_datetime))<2 then concat('0', string(DAY(pickup_datetime))) else string(DAY(pickup_datetime)) end), '/'), (case when length(MONTH(pickup_datetime))<2 then concat('0', string(MONTH(pickup_datetime))) else string(MONTH(pickup_datetime)) end)) as dia, DAY(pickup_datetime) as dias, MONTH(pickup_datetime) as meses, sum(tip_amount) as gorjeta FROM trips WHERE YEAR(pickup_datetime) = 2012 and MONTH(pickup_datetime) > 7 group by meses, dias order by meses, dias").toPandas()
In [89]:
test
Out[89]:
dia dias meses gorjeta
0 01/08 1 8 1268.04
1 02/08 2 8 1473.60
2 03/08 3 8 1315.18
3 04/08 4 8 1314.96
4 05/08 5 8 1297.76
... ... ... ... ...
83 23/10 23 10 1420.38
84 24/10 24 10 1236.91
85 25/10 25 10 1253.87
86 26/10 26 10 1397.91
87 27/10 27 10 1217.40

88 rows × 4 columns

In [90]:
sns.lineplot(x="dia", y="gorjeta",data=test)
Out[90]:
<AxesSubplot:xlabel='dia', ylabel='gorjeta'>
In [109]:
from matplotlib import pyplot as plt
import seaborn as sns

plt.figure(figsize=(60,50))
plt.xlabel('xlabel', fontsize=50)
plt.ylabel('ylabel', fontsize=50)
plt.xticks(fontsize=25, rotation=45)
plt.yticks(fontsize=40)
ax = sns.lineplot(x="dia", y="gorjeta",data=test)
# ax.tick_params(axis='both', which='major', labelsize=30)
In [110]:
test
Out[110]:
dia dias meses gorjeta
0 01/08 1 8 1268.04
1 02/08 2 8 1473.60
2 03/08 3 8 1315.18
3 04/08 4 8 1314.96
4 05/08 5 8 1297.76
... ... ... ... ...
83 23/10 23 10 1420.38
84 24/10 24 10 1236.91
85 25/10 25 10 1253.87
86 26/10 26 10 1397.91
87 27/10 27 10 1217.40

88 rows × 4 columns

In [111]:
ax = sns.lineplot(x="dias", y="gorjeta",data=test, hue='meses')
In [4]:
#configuração da dimensões da tela de apresentação
from notebook.services.config import ConfigManager
cm = ConfigManager()

#defina as dimensões da apresentação.
cm.update('livereveal', {
              'width': 1000,
              'height': 600,
              'scroll': True,
         })
Out[4]:
{'width': 1000, 'height': 600, 'scroll': True}
In [6]:
!jupyter nbconvert teste.ipynb --to slides --post serve --template output_toggle.tpl
[NbConvertApp] Converting notebook teste.ipynb to slides
Traceback (most recent call last):
  File "C:\Users\Usuario\anaconda3\Scripts\jupyter-nbconvert-script.py", line 10, in <module>
    sys.exit(main())
  File "C:\Users\Usuario\anaconda3\lib\site-packages\jupyter_core\application.py", line 264, in launch_instance
    return super(JupyterApp, cls).launch_instance(argv=argv, **kwargs)
  File "C:\Users\Usuario\anaconda3\lib\site-packages\traitlets\config\application.py", line 846, in launch_instance
    app.start()
  File "C:\Users\Usuario\anaconda3\lib\site-packages\nbconvert\nbconvertapp.py", line 346, in start
    self.convert_notebooks()
  File "C:\Users\Usuario\anaconda3\lib\site-packages\nbconvert\nbconvertapp.py", line 518, in convert_notebooks
    self.convert_single_notebook(notebook_filename)
  File "C:\Users\Usuario\anaconda3\lib\site-packages\nbconvert\nbconvertapp.py", line 483, in convert_single_notebook
    output, resources = self.export_single_notebook(notebook_filename, resources, input_buffer=input_buffer)
  File "C:\Users\Usuario\anaconda3\lib\site-packages\nbconvert\nbconvertapp.py", line 412, in export_single_notebook
    output, resources = self.exporter.from_filename(notebook_filename, resources=resources)
  File "C:\Users\Usuario\anaconda3\lib\site-packages\nbconvert\exporters\exporter.py", line 181, in from_filename
    return self.from_file(f, resources=resources, **kw)
  File "C:\Users\Usuario\anaconda3\lib\site-packages\nbconvert\exporters\exporter.py", line 199, in from_file
    return self.from_notebook_node(nbformat.read(file_stream, as_version=4), resources=resources, **kw)
  File "C:\Users\Usuario\anaconda3\lib\site-packages\nbconvert\exporters\html.py", line 129, in from_notebook_node
    return super().from_notebook_node(nb, resources, **kw)
  File "C:\Users\Usuario\anaconda3\lib\site-packages\nbconvert\exporters\templateexporter.py", line 390, in from_notebook_node
    output = self.template.render(nb=nb_copy, resources=resources)
  File "C:\Users\Usuario\anaconda3\lib\site-packages\jinja2\environment.py", line 1090, in render
    self.environment.handle_exception()
  File "C:\Users\Usuario\anaconda3\lib\site-packages\jinja2\environment.py", line 832, in handle_exception
    reraise(*rewrite_traceback_stack(source=source))
  File "C:\Users\Usuario\anaconda3\lib\site-packages\jinja2\_compat.py", line 28, in reraise
    raise value.with_traceback(tb)
  File "C:\Users\Usuario\output_toggle.tpl", line 5, in top-level template code
    {%- extends 'slides_reveal.tpl' -%}
jinja2.exceptions.TemplateNotFound: slides_reveal.tpl
In [ ]: